Contents

import pandas as pd
import plotly.express as px

pd_merge = pd.read_csv('datasets/water_life.csv')
pd_merge.dropna(inplace=True)

fig = px.scatter(pd_merge, x='Water', y='Life', animation_frame='Year', animation_group='Country Name', color='Country Name', hover_name='Country Name', size_max=55, range_x=[1,100], range_y=[0,90])
fig.show()
import pandas as pd
import plotly.express as px

# Load the CSV file
data = pd.read_csv('datasets/water_life.csv')

# Drop rows with missing values for water or life expectancy
data_clean = data.dropna(subset=['Water', 'Life'])

# Ensure 'Water' and 'Life' columns are numeric
data_clean['Water'] = pd.to_numeric(data_clean['Water'], errors='coerce')
data_clean['Life'] = pd.to_numeric(data_clean['Life'], errors='coerce')

# Round the 'Water' column to the nearest full percentage
data_clean['Water'] = data_clean['Water'].round()

# Group by the rounded 'Water' values and take the mean of 'Life' for each group
grouped_data = data_clean.groupby(['Year', 'Water'], as_index=False).agg({'Life': 'mean'})

# Create the bar plot using Plotly with a slider for years and set the bar color to purple
fig = px.line(grouped_data, x='Water', y='Life', animation_frame='Year', 
             range_x=[0, 100],  # Adjust x-axis range for better visibility
             range_y=[0, 100],   # Adjust y-axis range for better visibility
             labels={'Water': 'Access to Water (%)', 'Life': 'Life Expectancy'},
             title='Life Expectancy vs. Access to Water Over the Years',
             color_discrete_sequence=['purple'])

# Show the plot
fig.show()
/tmp/ipykernel_188032/1790060808.py:11: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

/tmp/ipykernel_188032/1790060808.py:12: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

/tmp/ipykernel_188032/1790060808.py:15: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy